library(haven)
library(foreign)
library(usmap) #import the package
library(ggplot2) #use ggplot2 to add layer for visualization
library(tidyverse)
library(maps)
library(data.table)
library(ggrepel) # if need to repel labels 
library(mapproj)
install.packages("devtools")
devtools::install_github("UrbanInstitute/urbnmapr")
library(urbnmapr)
library(geosphere)
library(sf)
library(dplyr)

#Set working directory
setwd("")


#### National Vote Share CI ####
simulations <- read_dta("SimulationData/histogram_data_2024_all_10000.dta")
ec_votes <- read_dta("Electoral_Votes_Final.dta")

percentiles <- apply(simulations[,1:51], 2, quantile, probs = c(0.025,0.5, 0.975))

percentiles <- as.data.frame(t(percentiles))
percentiles$cstate_name <- rownames(percentiles)

population <- read_dta("Combined Estimates.dta")
population$cstate_name <- sub(" ", "_", population$cstate_name)
population$cstate_name <- sub(" ", "_", population$cstate_name)

rel_population <- merge(percentiles,population,by="cstate_name")
rel_population$relpop_2.5 <- rel_population$`2.5%`*rel_population$Rel_Pop_2020
rel_population$relpop_50 <- rel_population$`50%`*rel_population$Rel_Pop_2020
rel_population$relpop_97.5 <- rel_population$`97.5%`*rel_population$Rel_Pop_2020

#Popular two-party vote estimate for Harris
sum(rel_population$relpop_50)

#95% Uncertainty Estimate
sum(rel_population$relpop_2.5)
sum(rel_population$relpop_97.5)


#### Map of USA ####
#### Figure 2 ####
predictions <- simulations %>%
  gather(x, value, c(Alabama:Wyoming,Maine_D1,Maine_D2,Nebraska_D1,Nebraska_D2,Nebraska_D3)) %>%
  group_by(x) %>%
  tally(value > 50)

names <- predictions$x
subset <- predictions[,]

predictions2 <- data.frame(x = names,
                           percent_demwin = colMeans(simulations[,c(names)], na.rm = TRUE))

predictions$percent_demwin <- predictions$n/70000


predictions$values <- 0
predictions$values[predictions$percent_demwin>0.98] <- 1
predictions$values[predictions$percent_demwin<=0.98 & predictions$percent_demwin>0.75] <- 2
predictions$values[predictions$percent_demwin<=0.75 & predictions$percent_demwin>0.55] <- 3
predictions$values[predictions$percent_demwin<=0.55 & predictions$percent_demwin>0.45] <- 4
predictions$values[predictions$percent_demwin<=0.45 & predictions$percent_demwin>0.25] <- 5
predictions$values[predictions$percent_demwin<=0.25 & predictions$percent_demwin>0.02] <- 6
predictions$values[predictions$percent_demwin<=0.02] <- 7

predictions$full <- sub("_", " ", predictions$x)
predictions$full <- sub("_", " ", predictions$full)


tester <-urbnmapr::states
test2 <- left_join(predictions, tester, join_by(full==state_name)) 

test3 <- as.data.table(test2)


# Convert to data.table
setDT(test3)

# Re-define centroid function - Lon is first argument and Lat is second
# Geosphere takes a matrix with two columns: Lon|Lat, so we use cbind to coerce the data to this form
findCentroid <- function(Lon, Lat, ...){
  centroid(cbind(Lon, Lat), ...)
}

test4 <- test3[!is.na(lat),]
# Find centroid Lon and Lat by ID, as required
test4 <-test4[, c("Cent_lon", "Cent_lat") := as.list(findCentroid(long, lat)), by = x]

#issue with placement of text in map so moving centroid location manually
test4$Cent_lat[test4$full=="Alaska"] <- 27
test4$Cent_lon[test4$full=="Alaska"] <- -117
test4$Cent_lon[test4$full=="Florida"] <- test4$Cent_lon[test4$full=="Florida"]+1
test4$Cent_lon[test4$full=="Louisiana"] <- test4$Cent_lon[test4$full=="Louisiana"]-0.5
test4$Cent_lon[test4$full=="Michigan"] <- test4$Cent_lon[test4$full=="Michigan"]+2
test4$Cent_lat[test4$full=="Michigan"] <- test4$Cent_lat[test4$full=="Michigan"]-2
test4$Cent_lat[test4$full=="New York"] <- test4$Cent_lat[test4$full=="New York"]-0.5
test4$Cent_lon[test4$full=="Kentucky"] <- test4$Cent_lon[test4$full=="Kentucky"]+0.5



labels1 <- as.data.frame(test4)
labels1 <- unique(as.data.frame(labels1[c("Cent_lon", "Cent_lat","state_abbv","values")]))

labels1$values <- as.factor(labels1$values)
test4$values <- as.factor(test4$values)

labels1 <- merge(labels1,ec_votes,by.x = "state_abbv",by.y="Abbr")
labels1$label <- paste(labels1$state_abbv,labels1$Votes,sep="\n")

separate_labs <- c("MA","RI","CT","NJ","DE","MD","DC","VT","NH")

lab_cols <- data.frame(abbr = labels1$state_abbv[labels1$state_abbv %in% separate_labs],
                       values = labels1$values[labels1$state_abbv %in% separate_labs],
                       labels=labels1$Votes[labels1$state_abbv %in% separate_labs])
lab_cols$labels <- paste(lab_cols$abbr,lab_cols$labels,sep="")


lab_cols$color[lab_cols$values==1] <- "#0066CC"
lab_cols$color[lab_cols$values==2] <- "#7FAEFF"
lab_cols$color[lab_cols$values==3] <- "#C5DBFC"
lab_cols$color[lab_cols$values==4] <- "#D1C7A8"
lab_cols$color[lab_cols$values==5] <- "#F4C1AB"
lab_cols$color[lab_cols$values==6] <- "#F88C79"
lab_cols$color[lab_cols$values==7] <- "#E91D0E"

lab_cols$text_col <- "black"
lab_cols$text_col[lab_cols$values==1 | lab_cols$values == 7] <-"white"


labels1$state_abbv2 <- labels1$state_abbv
labels1$state_abbv2[labels1$state_abbv2 %in% separate_labs] <- ""
labels1$text_col <- "black"
labels1$text_col[labels1$values==1 | labels1$values == 7] <-"white"


# Create DF for CD Votes
cds <- c("Maine D1","Maine D2","Nebraska D1","Nebraska D2","Nebraska D3")
CD_votes <- predictions[predictions$full %in% cds,]

CD_votes$color[CD_votes$values==1] <- "#0066CC"
CD_votes$color[CD_votes$values==2] <- "#7FAEFF"
CD_votes$color[CD_votes$values==3] <- "#C5DBFC"
CD_votes$color[CD_votes$values==4] <- "#D1C7A8"
CD_votes$color[CD_votes$values==5] <- "#F4C1AB"
CD_votes$color[CD_votes$values==6] <- "#F88C79"
CD_votes$color[CD_votes$values==7] <- "#E91D0E"

CD_votes$abbr <- c("ME D1","ME D2","NE D1","NE D2","NE D3")
CD_votes$labels <- paste(CD_votes$abbr," - 1",sep="")
CD_votes$Votes <- 1

demvotes <- sum(labels1$Votes[labels1$values==3 | labels1$values==2 | labels1$values==1])
demvotes <- demvotes + (sum(CD_votes$Votes[CD_votes$values==3 | CD_votes$values==2 | CD_votes$values==1]))
repvotes <- sum(labels1$Votes[labels1$values==7 | labels1$values==6 | labels1$values==5])
repvotes <- repvotes + (sum(CD_votes$Votes[CD_votes$values==7 | CD_votes$values==6 | CD_votes$values==5]))
tossvotes <- sum(labels1$Votes[labels1$values==4])

setDT(labels1)

hcl <- farver::decode_colour(CD_votes$color, "rgb", "hcl")
label_col <- ifelse(hcl[, "l"] > 50, "black", "white")

cols <- c(1 == "#0066CC", 2 == "#7FAEFF",3 == "#C5DBFC",4 == "#D1C7A8",
          5 == "#F4C1AB",6 == "#F88C79", 7 == "#E91D0E")

labels1[labels1$state_abbv2=="HI",] <- "" 

test4 %>%
  ggplot(aes(long, lat, group = group, fill = values)) +
  geom_polygon(color = NA) +
  coord_map() +
  labs(fill = "")+
  geom_path(color="white")+  theme_classic() +
  geom_text(data=labels1[labels1$state_abbv2!="",],aes(x=Cent_lon,y=Cent_lat, label=label), 
            size=6, inherit.aes=F, fontface = "bold",lineheight = 0.7,color="white")+ # the normal labels: 
    theme(legend.position = c(.45,0.1),
        legend.title=element_text(size=1),  # font size of the legend 
        legend.text=element_text(size=15),
        axis.title.x=element_blank(),  # remove axis, title, ticks
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),  # remove axis, title, ticks
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        axis.line=element_blank(),
        legend.key.height= unit(.3, 'cm'),
        legend.key.width= unit(3, 'cm'),
        legend.box = "horizontal",
        legend.margin = margin(0, 0, 0, 0),
        legend.spacing.x = unit(0, "mm"),
        legend.justification = "center")+
  scale_fill_manual(values = c("1"="#0066CC","2"="#7FAEFF","3"="#C5DBFC","4"="#D1C7A8","5"="#F4C1AB","6"="#F88C79","7"="#E91D0E"),
                    labels = c("1"="Solid D\n>98 in 100","2"="Likely D\n>75 in 100",
                               "3"="Lean D\n>55 in 100","4"="Toss-up\nBoth <55 in 100",
                               "5"="Lean R\n>55 in 100","6"="Likely R\n>75 in 100",
                               "7"="Solid R\n>98 in 100"))+
  guides(fill = guide_legend(title.position = "top", 
                              hjust = 1, #centres the title horizontally
                              title.hjust = 0.5,
                              label.position = "bottom",nrow=1,byrow=F,size=6,reverse = T))+
  xlim(-130,-55)+ ylim(15,50)+
  annotate("rect", xmin=-58.5, xmax=-62.5, ymin=30 , ymax=32,color="black", fill=lab_cols$color[lab_cols$abbr=="MA"])+
  annotate(geom="text",x=-62,y=31,label=lab_cols$labels[lab_cols$abbr=="MA"],
           hjust=0,fontface = "bold", size=6,color="white")+
  annotate("rect", xmin=-58.5, xmax=-62.5, ymin=32 , ymax=34,color="black", fill=lab_cols$color[lab_cols$abbr=="RI"])+
  annotate(geom="text",x=-62,y=33,label=lab_cols$labels[lab_cols$abbr=="RI"],
           hjust=0,fontface = "bold", size=6,color="white")+
  annotate("rect", xmin=-58.5, xmax=-62.5, ymin=34 , ymax=36,color="black", fill=lab_cols$color[lab_cols$abbr=="CT"])+
  annotate(geom="text",x=-62,y=35,label=lab_cols$labels[lab_cols$abbr=="CT"],
           hjust=0,fontface = "bold", size=6,color="white")+
  annotate("rect", xmin=-58.5, xmax=-62.5, ymin=36 , ymax=38,color="black", fill=lab_cols$color[lab_cols$abbr=="NJ"])+
  annotate(geom="text",x=-62,y=37,label=lab_cols$labels[lab_cols$abbr=="NJ"],
           hjust=0,fontface = "bold", size=6,color="white")+
  annotate("rect", xmin=-58.5, xmax=-62.5, ymin=38 , ymax=40,color="black", fill=lab_cols$color[lab_cols$abbr=="DE"])+
  annotate(geom="text",x=-62,y=39,label=lab_cols$labels[lab_cols$abbr=="DE"],
           hjust=0,fontface = "bold", size=6,color="white")+
  annotate("rect", xmin=-58.5, xmax=-62.5, ymin=40 , ymax=42,color="black", fill=lab_cols$color[lab_cols$abbr=="MD"])+
  annotate(geom="text",x=-62,y=41,label=lab_cols$labels[lab_cols$abbr=="MD"],
           hjust=0,fontface = "bold", size=6,color="white")+
  annotate("rect", xmin=-58.5, xmax=-62.5, ymin=42 , ymax=44,color="black", fill=lab_cols$color[lab_cols$abbr=="DC"])+
  annotate(geom="text",x=-62,y=43,label=lab_cols$labels[lab_cols$abbr=="DC"],
           hjust=0,fontface = "bold", size=6,color="white")+
  annotate("rect", xmin=-58.5, xmax=-62.5, ymin=44 , ymax=46,color="black", fill=lab_cols$color[lab_cols$abbr=="NH"])+
  annotate(geom="text",x=-62,y=45,label=lab_cols$labels[lab_cols$abbr=="NH"],
           hjust=0,fontface = "bold", size=6,color="white")+
  annotate("rect", xmin=-58.5, xmax=-62.5, ymin=46 , ymax=48,color="black", fill=lab_cols$color[lab_cols$abbr=="VT"])+
  annotate(geom="text",x=-62,y=47,label=lab_cols$labels[lab_cols$abbr=="VT"],
           hjust=0,fontface = "bold", size=6,color="white")+
  annotate("rect", xmin=-68.8, xmax=-68, ymin=44.5 , ymax=45.4,color="white", fill=CD_votes$color[CD_votes$abbr=="ME D1"])+
  annotate(geom="text",x=-68.7,y=44.96,label="1",
           hjust=0,fontface = "bold", size=6,color="white")+
  annotate("rect", xmin=-67.8, xmax=-67, ymin=44.5 , ymax=45.4,color="white", fill=CD_votes$color[CD_votes$abbr=="ME D2"])+
  annotate(geom="text",x=-67.7,y=44.96,label="1",
           hjust=0,fontface = "bold", size=6,color="white")+
  annotate("rect", xmin=-99, xmax=-98.3, ymin=40.65 , ymax=41.55,color="white", fill=CD_votes$color[CD_votes$abbr=="NE D1"])+
  annotate(geom="text",x=-99,y=41.1,label="1",
           hjust=0,fontface = "bold", size=6,color="white")+
  annotate("rect", xmin=-98, xmax=-97.3, ymin=40.65 , ymax=41.55,color="white", fill=CD_votes$color[CD_votes$abbr=="NE D2"])+
  annotate(geom="text",x=-98,y=41.1,label="1",
           hjust=0,fontface = "bold", size=6,color="white")+
  annotate("rect", xmin=-97, xmax=-96.3, ymin=40.65 , ymax=41.55,color="white", fill=CD_votes$color[CD_votes$abbr=="NE D3"])+
  annotate(geom="text",x=-97,y=41.1,label="1",
           hjust=0,fontface = "bold", size=6,color="white")+
  annotate(geom="text",x=-103,y=23,label="HI\n4",
           hjust=0,fontface = "bold", size=6,color="black",lineheight = 0.7)


ggsave("SimulationData/map_Fig2.pdf",width = 20, height = 10, dpi = 300, units = "in", device='pdf')
ggsave("SimulationData/map_Fig2.eps",width = 20, height = 8, dpi = 300, units = "in", device='eps')
ggsave("SimulationData/map_Fig2.png",width = 20, height = 8, dpi = 300, units = "in", device='png')

# Based on https://urban-institute.medium.com/how-to-create-state-and-county-maps-easily-in-r-577d29300bb2
# and https://liuyanguu.github.io/post/2019/04/17/ggplot-heatmap-us-50-states-map-and-china-province-map/
